\documentclass[12pt]{extarticle}
%Some packages I commonly use.
\usepackage[english]{babel}
\usepackage{ctex}
\usepackage{graphicx}
\usepackage{framed}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{enumerate}
\usepackage[utf8]{inputenc}
\usepackage[top=1 in,bottom=1in, left=1 in, right=1 in]{geometry}

%A bunch of definitions that make my life easier
\newcommand{\matlab}{{\sc Matlab} }
\newcommand{\cvec}[1]{{\mathbf #1}}
\newcommand{\rvec}[1]{\vec{\mathbf #1}}
\newcommand{\ihat}{\hat{\textbf{\i}}}
\newcommand{\jhat}{\hat{\textbf{\j}}}
\newcommand{\khat}{\hat{\textbf{k}}}
\newcommand{\minor}{{\rm minor}}
\newcommand{\trace}{{\rm trace}}
\newcommand{\spn}{{\rm Span}}
\newcommand{\rem}{{\rm rem}}
\newcommand{\ran}{{\rm range}}
\newcommand{\range}{{\rm range}}
\newcommand{\mdiv}{{\rm div}}
\newcommand{\proj}{{\rm proj}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\<}{\langle}
\renewcommand{\>}{\rangle}
\renewcommand{\emptyset}{\varnothing}
\newcommand{\attn}[1]{\textbf{#1}}
\theoremstyle{definition}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\newtheorem*{definition}{Definition}
\newtheorem*{example}{Example}
\newtheorem*{note}{Note}
\newtheorem{exercise}{Exercise}
\newcommand{\bproof}{\bigskip {\bf Proof. }}
\newcommand{\eproof}{\hfill\qedsymbol}
\newcommand{\Disp}{\displaystyle}
\newcommand{\qe}{\hfill\(\bigtriangledown\)}
\setlength{\columnseprule}{1 pt}


\title{Quasi-Newton Method}
\author{eleve11}
\date{7.8 2019}

\begin{document}

\maketitle

\section{BFGS}
BFGS是一种流行的拟牛顿法的算法。是以Broyden, Fletcher, Goldfarb, Shanno这四位研究者名字首字母命名的。当然这一切的来源是函数的泰勒展开式。

\begin{equation}
  m_k(p) = f_k + \nabla f_k^T p + \frac{1}{2}p^T B_k p
\end{equation}
其中$B_k$是一个对称的正定矩阵，也是替代Hessian矩阵逆的矩阵。$p= \Delta x = x_k - x_{k-1}$。

\begin{equation}\label{equation:optimizer}
  p_k = -B_k^{-1} \nabla f_k
\end{equation}
numerical optimization书中的表达如上，下面推导使用$f(x)$。存在需要优化的函数$f(x)$，进行泰勒二阶展开得
\begin{equation}
  f(x) = f(x_k) + f^\prime(x_k)(x-x_k) + \frac{1}{2}f^{\prime \prime}(x_k)(x-x_k)^2
\end{equation}
此时使用$f(x)$的泰勒展开式的极值点近似$f(x)$的极值点
\begin{equation}
  f^\prime(x) = f^\prime(x_k) + f^{\prime \prime}(x_k)(x-x_k) = 0
\end{equation}
\begin{equation}
  \Delta x = x-x_k = -\frac{f^\prime(x_k)}{f^{\prime \prime}(x_k)}
\end{equation}
换成矩阵形式，一阶即Gradient矩阵，二阶即Hessian矩阵
\begin{equation}
  f(x) = f(x_k) + \Delta x G + \frac{1}{2} \Delta x^T H \Delta x
\end{equation}
\begin{equation} \label{equation:optimizer1}
  \Delta x = x - x_k = - H^{-1} G
\end{equation}
以上就推导了在使用牛顿法进行优化时公式\ref{equation:optimizer}与\ref{equation:optimizer1}代表着优化前进的方向。两个公式并无差别，一个是以x为自变量的函数$f(x)$，一个是以$p = \Delta x$为变量的函数$m_k(p)$，两个函数的关系$m_k(0) = f_k(x)$

每次优化迭代过程
\begin{equation}
  x_{k+1} = x_k + \alpha _k p_k
\end{equation}
迭代的步长$\alpha _k$需要满足Wolfe conditions。$B_k$是近似接近Hessian矩阵的。

不是每次迭代都重新去计算矩阵$B_k$，经过一次迭代得到
\begin{equation}
  m_{k+1}(p) = f_{k+1} + \nabla f_{k+1}^T p + \frac{1}{2}p^T B_{k+1} p
\end{equation}
此时矩阵$B_{k+1}$需要满足什么条件，才能继续迭代下去？

首先，$m_{k+1}$在两次迭代$x_k$和$x_{k+1}$之间的梯度与目标函数$f(x)$的梯度匹配
\begin{equation}
  \nabla m_{k+1}(-\alpha _k p_k) = \nabla f_{k+1} - \alpha _k B_{k+1} p_k = \nabla f_k
\end{equation}
移项可得
\begin{equation}
  B_{k+1} \alpha _k p_k = \Delta f_{k+1} - \Delta f_k
\end{equation}
令$s_k = x_{k+1} - x_k= \alpha _k p_k,\quad y_k=\Delta f_{k+1} - \Delta f_k$，可简化得到secant equation
\begin{equation} \label{equation:secant}
  B_{k+1} s_k = y_k
\end{equation}
当满足curvature conditions时，即满足$s_k ^T y_k > 0$,等式\ref{equation:secant}将对称的正定矩阵$B_{k+1}$从$s_k$映射到$y_k$，

\section{Induction Proofs}
\subsection{Ordinary Induction}
\begin{exercise} Prove, for all natural numbers $n$, that
\begin{equation} \sum_{k=0}^n k = 1 + 2 + 3 + \cdots + n = \dfrac{n(n+1)}{2}
\label{eq:Pn}\end{equation}
\end{exercise}

\begin{proof}
We prove this by induction on $n\in\N$.  In the base case, $n=0$, and \eqref{eq:Pn} becomes
$$\sum_{k=0}^n k = \sum_{k=0}^0 k = 0 = \dfrac{0(1)}{2} = \dfrac{n(n+1)}{2}$$

Now, let $n>0$ be arbitrary, and assume \eqref{eq:Pn}.
We show $\displaystyle \sum_{k=0}^{n+1} k = \dfrac{(n+1)(n+2)}{2}$.
To that end note
\begin{align*}
	\sum_{k=0}^{n+1} k &= \left(\sum_{k=0}^{n} k\right) + (n+1) &\mbox{(sum definition)}\\
	&= \frac{n(n+1)}{2} + (n+1) &\mbox{(induction hypothesis)}
	\\
	&= \frac{n(n+1)}{2} + \frac{2n+2}{2} &\mbox{(common denominator)}
	\\
	&= \frac{n^2 +n}{2} + \frac{2n+2}{2} &\mbox{(distribute)}
	\\
	&= \frac{n^2 +3n + 2}{2} &\mbox{(combine like terms)}
	\\
	&= \frac{(n+1)(n+2)}{2} & \mbox{(factor the numerator)}\\
\end{align*}

In all cases, \eqref{eq:Pn} is true, so $\forall n\in \N$,
$\Disp \sum_{k=0}^n k = \dfrac{n(n+1)}{2}$
\end{proof}



\end{document}
